Subsetting States

Wastewater Data from Biobot Analytics

biobot_link <- "https://raw.githubusercontent.com/biobotanalytics/covid19-wastewater-data/master/wastewater_by_county.csv"


w_data <- read_csv(biobot_link)%>% 
  filter(sampling_week >= ymd("2021-03-01") &  sampling_week <= ymd("2022-03-01")) %>%
  mutate(fips = factor(fipscode)) %>%
  select(-fipscode)



# counties with the most wastewater data
w_data %>% 
  group_by(name,fips) %>%
  summarize(n=n()) %>%
  arrange(desc(n))
# states with the most wastewater data
w_data %>% 
  group_by(name,fips,state) %>%
  summarize(n=n()) %>% 
  group_by(state) %>% 
  summarize(obs = sum(n)) %>% 
  arrange(desc(obs))
####################################################################
# COMPARE WASTEWATER TO CORRECTED CASES
####################################################################

state <- "ma"
priors_versions <- c("v2", "v6", "v7", "v10")


state_corrected_path <-  paste0(here::here(), "/data/adj_cases_output_biweekly/", state, "/")


versions <- tibble(
  version = c("v2", "v6", "v7", "v8","v9", "v10"),
  vlabel = c("Original",
             "Distribution about Smoothed Empirical Beta",
             "Distribution about Smoothed Empirical Beta\n and Distribution about Smoothed Empirical P(S|untested)",
             "Distribution about Smoothed Empirical Beta \n and Smoothing County-level Test Positivity Rate",
             "Distribution about Smoothed Empirical Beta \n and Smoothing County-level Test Positivity Rate\n and Updated Screening Test Positivity",
             "Distribution about Smoothed Empirical P(S|untested)"))


versions <- versions %>%
  mutate(vlabel= gsub("Distribution about", "", vlabel)) %>%
  mutate(vlabel = gsub("Empirical", "Emp.", vlabel))%>%
  mutate(vlabel = factor(vlabel, levels = .$vlabel))



################################
# ESTIMATED
################################
dates <- readRDS(paste0(here::here(), "/data/date_to_biweek.RDS"))

corrected <- map_df(priors_versions, ~readRDS(
        paste0(state_corrected_path, "adj_cases_",
               .x, 
               ".RDS")) %>% 
          mutate(version = .x)) %>%
  rename(biweek = week) %>%
  left_join(dates)

corrected <- corrected %>%
  left_join(versions)


pal <- c("#74A09F", "#A0748B", 
         "#748BA0", "#A08974",
         "#D49E9F", "#D4B89E", "#AFCFE5")
compare_county_wastewater <- function(county_fips, end_date ="2021-12-15") {
  
  county_name <- w_data %>% 
    filter(fips == county_fips) %>%
    pull(name)
    
  custom_title = paste0("Comparing Wastewater Concentration to Corrected Estimates for ",
                 county_name, "\nFIPS: ", county_fips)
  
  end_date <- ymd(end_date)
  
  w_data_for_county <- w_data %>%
    rename(date = sampling_week) %>%
    filter(fips == county_fips & date <= end_date)
  
  if(nrow(w_data_for_county) == 0) {
    message(paste0("No wastewater data for FIPS: ", 
                   county_fips));
    return()}
  
  begin_date <- min(w_data_for_county$date)
  
  
  adj <- corrected %>%filter(fips == county_fips & date <= end_date) %>% pull(exp_cases_ub) %>% max()
  
  conc_max <- max(w_data_for_county$effective_concentration_rolling_average) 
  
  adj <- conc_max/adj
  
  corrected %>%
    filter(fips == county_fips  & date >= begin_date & date <= end_date) %>%
    ggplot() +
    geom_ribbon(aes(x = date, 
               ymin = exp_cases_lb*adj,
               ymax = exp_cases_ub*adj,
               fill = vlabel),
               alpha = .7) +
    geom_line(data = w_data_for_county, 
              aes(x = date, y =effective_concentration_rolling_average ),
              color = "#DB4048",
              size = 1.1) +
    geom_point(data = w_data_for_county, 
              aes(x = date, y =effective_concentration_rolling_average ),
              color = "#DB4048",
              alpha = .5,
              size = 1.2) +
    facet_wrap(~vlabel) +
    scale_fill_manual(values = pal) +
    theme_bw() +
    theme(
      legend.position = "none",
      plot.title = element_text(face = "bold", size = 16, hjust = .5),
      axis.title = element_text(size = 18),
      strip.text = element_text(size = 14)
    )+
    scale_y_continuous(sec.axis = sec_axis(~./adj,
                                       name = "Corrected Infection Estimates",
                                       labels = comma),
                       labels = comma) +
    labs(y = "Effective Concentration Rolling Average",
         title = custom_title) +
    scale_x_date(date_labels = "%b %Y")
}

 # compare_county_wastewater(county_fips = "25023")